Importando bibliotecas que serão utilizadas
library(dplyr)
library(purrr)
library(stats)
library(plotly)
library(stringr)
library(janitor)
library(ggplot2)
source("./source/multiplot.R")
Carregando a base de Video Games, e limpando com o script fornecido pela fonte
url <- "https://raw.githubusercontent.com/lizawood/apps-and-games/master/PC_Games/PCgames_2004_2018_raw.csv"
raw_df <- url %>%
read.csv() %>%
janitor::clean_names()
data <- raw_df %>%
mutate(price = as.numeric(price),
score_rank = word(score_rank_userscore_metascore, 1),
average_playtime = word(playtime_median, 1),
median_playtime = word(playtime_median, 2),
median_playtime = str_remove(median_playtime, "\\("),
median_playtime = str_remove(median_playtime, "\\)"),
average_playtime = 60 * as.numeric(str_sub(average_playtime, 1, 2)) +
as.numeric(str_sub(average_playtime, 4, 5)),
median_playtime = 60 * as.numeric(str_sub(median_playtime, 1, 2)) +
as.numeric(str_sub(median_playtime, 4, 5)),
metascore = as.double(str_sub(score_rank_userscore_metascore, start = -4, end = -3))) %>%
select(-score_rank_userscore_metascore, -score_rank, -playtime_median) %>%
rename(publisher = publisher_s, developer = developer_s)
Filtrando desenvolvedoras com pelo menos 20 jogos
count_dev <- data %>%
dplyr::group_by(developer) %>%
dplyr::summarise(cont = length(developer)) %>%
tidyr::drop_na() %>%
dplyr::filter(cont >= 20)
data <- data %>%
dplyr::filter(developer %in% count_dev$developer)
Plotando preço médio dos jogos dos 10 desenvolvedores com maiores quantidades de horas jogadas
developers_price <- data %>%
dplyr::group_by(developer) %>%
dplyr::summarise(price = round(mean(price), 2),
playtime = as.double(mean(average_playtime)/60)
) %>%
dplyr::ungroup() %>%
dplyr::mutate(developer = as.factor(developer)) %>%
dplyr::arrange(desc(playtime)) %>%
dplyr::slice(1:10)
p <- developers_price %>%
ggplot(aes(x = developer, y = price, fill = developer)) +
geom_bar(colour="black", stat="identity") +
labs(title = "Preço jogos - Top 10 desenvolvedores horas jogadas", y = "Preço U$", x = "Desenvolvedores")
ggplotly(p)
Evolução do preço (U$) dos jogos dos 10 desenvolvedores com maiores quantidades de horas jogadas ao longo dos anos
developers_price_evolution <- data %>%
dplyr::mutate(year = stringr::str_extract(release_date, regex("\\d{4}"))) %>%
dplyr::group_by(developer, year) %>%
dplyr::summarise(price = round(mean(price), 2),
playtime = as.double(mean(average_playtime)/60)
) %>%
dplyr::ungroup() %>%
dplyr::mutate(developer = as.factor(developer),
year = as.integer(year)) %>%
dplyr::filter(developer %in% developers_price$developer)